Setup

Load necessary packages and clear workspace

library(ggplot2)
library(viridis)
library(lme4)
library(lmerTest)
library(emmeans)
library(knitr)
library(dplyr)
library(drc)
library(aomisc)
library(nlme)
library("ggpubr")
library(broom)
library(gridExtra)
library(gtable)
library(grid)
library(psycho)
rm(list = ls())

Setup plot themes

#Set text size and colors
txt.size = 12
txt.color = "black"
txt.face = "bold"

###define specs for agreement values plots over sample sizes
#agreement value colors
d.color = "#FFC700" #actual data 
random.color = "#00B885" #random data
crossMov.color = "#4D4D4D" #agreement values when actual data compared to cross movie.
factor.order <- c('crossMov', 'random', 'sample')
factor.color <- c(crossMov.color, random.color, d.color)
factor.shape <- c(24, 23, 21) #triangle, diamond, circle
factor.label <- c('Cross-movie', 'Random', 'Same')
#jitter for individual agreement values
jitter.alpha = .1 
jitter.width = 0.2
#point for mean agreement value 
point.size = 5
point.alpha = 1
#errorbar width 
errorbar.width = .8
errorbar.alpha = 1
#elbow related plot 
elbow.size = 8
elbowPos.scale = .01

#define specs for curvature function fittings. 
vir_7 = viridis(n = 7) #load the first 8 viridis colors
line.colors <- c(vir_7[1],vir_7[1], vir_7[2],vir_7[3], vir_7[4], vir_7[5], vir_7[6]) #Linear, Exponential, logarithmic, Asymptotic, Power Curve, Yield loss.
dot.color <- vir_7[7] #color for dotplot 
fit.jitter.alpha = .05
fit.jitter.width = 0.3
fit.line.size = 1.5
fit.line.alpha = 1

###define specs for saving agreement and function fit plots
figure.width = 25
figure.height = 19 
figure.unit = "cm"
figure.bg = "transparent"
figure.filetype = "png"

grain.labs <- c("Coarse", "Fine")
names(grain.labs) <- c("c", "f")

sampleSize = seq(from = 2, to = 32, by = 2)

#Set theme for plotting 
theme.esMethods <- theme(
panel.grid.minor = element_blank(), 
panel.grid.major = element_blank(), 
#panel.background= element_blank(), 
panel.border = element_blank(),
panel.spacing = unit(.05,'in'), 
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
axis.line = element_line(size = .2, colour = "black"), 
axis.title = element_text(size = txt.size),
axis.text = element_text(size = txt.size),
strip.background = element_rect(colour = NA, fill = "transparent"), 
strip.text = element_text(size = txt.size), 
legend.title = element_blank(),
legend.key = element_rect(colour = NA, fill = NA),
plot.title = element_text(hjust = 0.5)
)

Define necessary functions

Function: elbow_finder Adapted from https://stackoverflow.com/questions/2018178/finding-the-best-trade-off-point-on-a-curve/2022348#2022348

Function description: to find the stabilization point (elbow) of change in segmentation agreement based on best fitting growth/ decay functions.

Returns: coordinates of curve that is furthest from the straight line connecting the minimum and maximum points.

Parameter x_values: array of x coordinates of curve

Precondition x_values: numeric

Parameter x_values: array of y coordnates of curve

Precondition y_values: numeric

elbow_finder <- function(x_values, y_values) {
  # Max values to create line
  max_x_x <- max(x_values)
  max_x_y <- y_values[which.max(x_values)]
  min_x_x <- min(x_values)
  min_x_y <- y_values[which.min(x_values)]
  max_df <- data.frame(x = c(min_x_x, max_x_x), y = c(min_x_y, max_x_y))

  # Creating straight line between the max values
  fit <- lm(max_df$y ~ max_df$x)

  # Distance from point to line
  distances <- c()
  for(i in 1:length(x_values)) {
    distances <- c(distances, abs(coef(fit)[2]*x_values[i] - y_values[i] + coef(fit)[1]) / sqrt(coef(fit)[2]^2 + 1^2))
  }

  # Max distance point
  x_max_dist <- x_values[which.max(distances)]
  y_max_dist <- y_values[which.max(distances)]

  return(c(x_max_dist, y_max_dist))
}

Function: functionFits adapted from https://www.statforbiology.com/nonlinearregression/usefulequations

Function description: fit raw agreement values to growth (for peakiness, agreement index, surprise index, detection accuracy and predictive value) or decay (for peak-to-peak distance) functions

Parameter y: array of dependent variable

Precondition y: numeric

Parameter x: array of independent variable

Precondition x: factor (note: this parameter is a factor because it was taken from data frame that’s fed into lmer)

Parameter sampSize: sequence of sample size

Precondition sampSize: numeric

functionFit <- function(y, x, sampSize){
  x <- as.numeric(as.character(x))
  
  linear <- lm(y~x)
  exponential <- tryCatch(drm(y~x, fct = DRC.expoDecay()), error = function(e) drm(y~x, fct = EXD.2()))
  log <- drm(y ~ x, fct = DRC.logCurve())
  asympReg <- drm(y ~ x, fct = DRC.asymReg())
  powerCurve <- tryCatch(drm(y~x, fct = DRC.powerCurve()), error = function(e) drm(y~x, fct = L.4()))
  yl <- nls(y~NLS.YL(x, a, i)) #yielded erratic behavior for normativeHit
  
  models <- c('linear','exponential', 'log', 'asympReg', 'powerCurve', 'yl')
  model.prediction <- data.frame()
  for (model in models){
          assign("prediction", get(model))
          model.prediction <- rbind(model.prediction, data.frame(func = model, sampleSize = x, prediction = predict(prediction)))
  }
  
  bic <- BIC(linear,exponential,log,asympReg,powerCurve, yl)
  aic <- AIC(linear,exponential, log, asympReg, powerCurve, yl)
  
  functionFit.summary <- data.frame(AIC_df = aic$df,
                                    AIC = aic$AIC, 
                                    BIC_df = bic$df,
                                    BIC = bic$BIC)
  
  row.names(functionFit.summary) <- models
  
  mod.predicted <- predict(get(models[match(min(bic$BIC), bic$BIC)]), data.frame(sampSize))#, interval = 'confidence', level = .99
  elbow_pos <- elbow_finder(sampSize, mod.predicted)
  model <- models[match(min(bic$BIC), bic$BIC)]
  
  return(list(model.prediction, functionFit.summary,elbow_pos, model, coef(get(model))[length(coef(get(model)))]))
}

Function: plotIndivAgreement

Function description: function to plot agreement values

Parameter df: data frame containing values for plotting

Precondition df: data frame

Parameter var: variable name for y values (i.e. which agreement measure)

Precondition var: string

Parameter ylab: y-axis label

Precondition ylab: string

Parameter ymin: minimum y axis value

Precondition ymin: numeric

Parameter ymax: maximum y axis value

Precondition ymax: numeric

Parameter title: plot title

Precondition title: string

plotIndivAgreement <- function(df, var, ylab, ymin, ymax, title){
  factors <- factor.order[(length(unique(df$testCond))%%3):3]
  factor.colors <- factor.color[(length(unique(df$testCond))%%3):3]
  factor.shapes <- factor.shape[(length(unique(df$testCond))%%3):3]
  factor.labels <- factor.label[(length(unique(df$testCond))%%3):3]
  transform(df, testCond=factor(testCond,levels=factors))
  jitter_df <- sample_n(df, nrow(df)*.1)#take 10% of datapoint for jitter plot
  elbow = mean(df$elbow)

    figure <- ggplot(df, aes(x = sampleSize, y = get(var, df), fill = testCond, color = testCond, shape = testCond))+
    geom_jitter(data = jitter_df, aes(y = get(var, jitter_df)), alpha = jitter.alpha, width = jitter.width)+
    stat_summary(geom = "errorbar", fun.data = "mean_cl_normal", width = errorbar.width, color = 'black', alpha = errorbar.alpha)+
    geom_point(stat = "summary", fun = "mean", size = point.size, alpha = point.alpha, color = 'black')+
    scale_fill_manual(values = factor.colors, labels = factor.labels)+
    scale_color_manual(values = factor.colors, labels = factor.labels)+
    scale_shape_manual(values = factor.shapes, labels = factor.labels)+
    #scale_x_continuous(breaks = seq(from = 2, to = 32, by = 2))+
    geom_text(color = "black", x = elbow/2, y =  ymin + (elbowPos.scale*(ymax - (ymin))), label = '^', size = elbow.size, show.legend = FALSE)+
    #facet_wrap(~grain, scales='fixed', labeller = labeller(grain = c("c" = "Coarse", "f" = "Fine")))+
    # geom_text(color = 'black', x = 6, y = -.2, label = "+", size = txt.size, show.legend = FALSE)+
    labs(y = ylab, x = "Sample Size", title = title)+
    coord_cartesian(ylim = c(ymin, ymax))+
    #ylim(ymin, ymax)+
    theme.esMethods
    #assign(paste('figure.', g, sep = ''), figure)`
    #ggsave(paste(paste('figure.', g, sep = ''), ".png", sep = ""))
    
    return(figure)
}

Function: plotFunctionFits

Function description: function to plot fitted growth/ decay functions

Parameter df: data frame containing values for plotting (i.e. function fit predictions)

Precondition df: data frame

Parameter original_df: data frame containing raw agremement values (to add as jitter)

Precondition df: data frame

Parameter var: original_df variable name for agreement values (i.e. which agreement measure)

Precondition var: string

Parameter ylab: y-axis label

Precondition ylab: string

Parameter ymin: minimum y axis value

Precondition ymin: numeric

Parameter ymax: maximum y axis value

plotFunctionFits <- function(df, original_df, var, ylab, ymin, ymax){
  figure <- ggplot(df,aes(x = sampleSize, y = prediction, color = func))+
  geom_jitter(data = original_df[original_df$testCond == "sample",], 
              aes(x = as.numeric(as.character(sampleSize)), y = get(var, original_df)), alpha = fit.jitter.alpha, width = fit.jitter.width, fill = dot.color, color = dot.color)+
  geom_line(size = fit.line.size, alpha = fit.line.alpha)+
  scale_color_manual(values = line.colors,
                     labels = c("Linear", "Exponential", "Logarithmic", "Asymptotic", "Power Curve", "Yield Loss"))+
  geom_point(data = original_df[original_df$testCond == "sample",], aes (x = as.numeric(as.character(sampleSize)), y = get(var, original_df)), stat = "summary", fun = "mean", shape = 21, size = 4, color = 'black', fill = dot.color)+
  scale_x_continuous(breaks = c(seq(from =2, to = 32, by = 2)))+
  coord_cartesian(ylim = c(ymin, ymax))+
  #ylim(ymin,ymax)+
  labs(y = ylab, x = "Sample size")+
  #facet_wrap(~grain, scales='fixed', labeller = labeller(grain = c("c" = "Coarse", "f" = "Fine")))+
  theme.esMethods
  
  return(figure)
}

Analyse data

Peakiness

Load dataset and remove outliers for evAct peakiness (value > 20*sd)

setwd('../data/bootstrapped')
peakiness.comm <- read.delim("esMethods_Peakiness_adjust_c0.1_f0.05_1000Iterations_commercial.txt", head = TRUE)
#peakiness.evAct <- peakiness.evAct[peakiness.comm$peakiness < (mean(peakiness.comm$peakiness) + (20*sd(peakiness.comm$peakiness))),]
peakiness.comm$sampleSize <- as.factor(peakiness.comm$sampleSize)
peakiness.comm$logPeakiness <- log(peakiness.comm$peakiness, base = 10)

peakiness.evAct <- read.delim("esMethods_Peakiness_adjust_c0.1_f0.05_1000Iterations_evAct.txt", head = TRUE)
#peakiness.evAct <- peakiness.evAct[peakiness.evAct$peakiness < (mean(peakiness.evAct$peakiness) + (20*sd(peakiness.evAct$peakiness))),]
peakiness.evAct$sampleSize <- as.factor(peakiness.evAct$sampleSize)
peakiness.evAct$logPeakiness <- log(peakiness.evAct$peakiness, base = 10)

Fit peakiness values to growth and decay functions

##commercial 
peakiness.fit.coarse.comm <- functionFit(peakiness.comm$peakiness[peakiness.comm$grain == "c" & peakiness.comm$testCond == 'sample'], peakiness.comm$sampleSize[peakiness.comm$grain == "c" & peakiness.comm$testCond == 'sample'], sampleSize)
peakiness.fit.fine.comm <- functionFit(peakiness.comm$peakiness[peakiness.comm$grain == "f" & peakiness.comm$testCond == 'sample'], peakiness.comm$sampleSize[peakiness.comm$grain == "f" & peakiness.comm$testCond == 'sample'], sampleSize)

peakiness.functionFit <- peakiness.fit.coarse.comm[[1]] %>% mutate(grain = "c")
peakiness.functionFit <- rbind(peakiness.functionFit, peakiness.fit.fine.comm[[1]] %>% mutate(grain = "f"))
peakiness.functionFit$grain <- as.factor(peakiness.functionFit$grain)

#peakiness.functionFit.comm.figure <- plotFunctionFits(peakiness.functionFit, peakiness.comm[peakiness.comm$testCond == "sample",], "peakiness", "Peakiness (raw)")

#add elbow value to main dataframe 
for (i in 1:nrow(peakiness.comm)){
  if(peakiness.comm$grain[i] == 'c'){
    peakiness.comm$elbow[i] = 0 #coarse commercial peakiness did not stabilize
  } else {
    peakiness.comm$elbow[i] = peakiness.fit.fine.comm[[3]][1]
  }
}

##everyday Activities 
peakiness.fit.coarse.evAct <- functionFit(peakiness.evAct$peakiness[peakiness.evAct$grain == "c" & peakiness.evAct$testCond == "sample"], peakiness.evAct$sampleSize[peakiness.evAct$grain == 'c' & peakiness.evAct$testCond == "sample"], sampleSize)
peakiness.fit.fine.evAct <- functionFit(peakiness.evAct$peakiness[peakiness.evAct$grain == "f" & peakiness.evAct$testCond == "sample"], peakiness.evAct$sampleSize[peakiness.evAct$grain == 'f' & peakiness.evAct$testCond == "sample"], sampleSize)

peakiness.evAct.functionFit <- peakiness.fit.coarse.evAct[[1]] %>% mutate(grain = "c")
peakiness.evAct.functionFit <- rbind(peakiness.evAct.functionFit, peakiness.fit.fine.evAct[[1]] %>% mutate(grain = "f"))
peakiness.evAct.functionFit$grain <- as.factor(peakiness.evAct.functionFit$grain)

#peakiness.functionFit.evAct.figure <- plotFunctionFits(peakiness.evAct.functionFit, peakiness.evAct[peakiness.evAct$testCond == "sample",], "peakiness", "Peakiness (raw)") + ylim(0,50)

#add elbow value to main dataframe 
for (i in 1:nrow(peakiness.evAct)){
  if(peakiness.evAct$grain[i] == 'c'){
    peakiness.evAct$elbow[i] = 0 #coarse evAct peakiness did not stabilize
  } else {
    peakiness.evAct$elbow[i] = 0 #fine evAct peakiness did not stabilize
  }
}

Plot peakiness over sample size

peakiness.comm.coarse.plot <- plotIndivAgreement(peakiness.comm[peakiness.comm$grain == 'c',], "logPeakiness", "Peakiness (log)", 0, 2.5, "Coarse")
peakiness.comm.fine.plot <- plotIndivAgreement(peakiness.comm[peakiness.comm$grain == 'f',],"logPeakiness", "Peakiness (log)", 0, 2.5, "Fine")

peakiness.evAct.coarse.plot <- plotIndivAgreement(peakiness.evAct[peakiness.evAct$grain == 'c', ], "logPeakiness", "Peakiness (log)", 0, 2.5, "")
peakiness.evAct.fine.plot <- plotIndivAgreement(peakiness.evAct[peakiness.evAct$grain == 'f', ],  "logPeakiness", "Peakiness (log)", 0, 2.5, "")

peakiness.plot.combined <- ggarrange(peakiness.comm.coarse.plot, peakiness.comm.fine.plot,
                                      peakiness.evAct.coarse.plot, peakiness.evAct.fine.plot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")
peakiness.plot.combined

setwd('../plots')
ggsave("esMethod_peakiness.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")
ggsave(plot = peakiness.plot.combined, file = "esMethod_peakiness.png", 
        type = "cairo-png",  bg = "white",
        width = 25, height = 19, units = "cm", dpi = 300)

Plot peakiness function fits

peakiness.comm.coarse.funcPlot <- plotFunctionFits(peakiness.functionFit[peakiness.functionFit$grain == 'c',], peakiness.comm[peakiness.comm$grain == 'c' & peakiness.comm$testCond == 'sample',], "peakiness", "Peakiness", 2, 50)
peakiness.comm.fine.funcPlot <- plotFunctionFits(peakiness.functionFit[peakiness.functionFit$grain == 'f',], peakiness.comm[peakiness.comm$grain == 'f' & peakiness.comm$testCond == 'sample',], "peakiness", "Peakiness", 2, 50)

peakiness.evAct.coarse.funcPlot <- plotFunctionFits(peakiness.evAct.functionFit[peakiness.evAct.functionFit$grain == 'c',], peakiness.evAct[peakiness.evAct$grain == 'c' & peakiness.evAct$testCond == 'sample',], "peakiness", "Peakiness", 2, 60)
peakiness.evAct.fine.funcPlot <- plotFunctionFits(peakiness.evAct.functionFit[peakiness.evAct.functionFit$grain == 'f',], peakiness.evAct[peakiness.evAct$grain == 'f' & peakiness.evAct$testCond == 'sample',], "peakiness", "Peakiness", 2, 60)

peakiness.funcFit.plot.combined <- ggarrange(peakiness.comm.coarse.funcPlot, peakiness.comm.fine.funcPlot,
                                      peakiness.evAct.coarse.funcPlot, peakiness.evAct.fine.funcPlot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

peakiness.funcFit.plot.combined

setwd('../plots')
ggsave("esMethod_peakiness_functionFit.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")

Peakiness linear models and analysis

peakiness.comm.lmer <- lmer(peakiness~sampleSize*grain*testCond + (1|movieName), data = peakiness.comm)
#emm_options(pbkrtest.limit = 12800)
peakiness.comm.samp_vs_rand <- summary(emmeans(peakiness.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 1))), infer = T)
peakiness.comm.pairwise <- summary(emmeans(peakiness.comm.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

peakiness.comm.poly <- summary(contrast(emmeans(peakiness.comm.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast

peakiness.evAct.lmer <- lmer(peakiness~sampleSize*grain*testCond + (1|movieName), data = peakiness.evAct)
#emm_options(pbkrtest.limit = 25595)
peakiness.evAct.samp_vs_rand <- summary(emmeans(peakiness.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 1))), infer = T)# overall random vs sample 
peakiness.evAct.pairwise <- summary(emmeans(peakiness.evAct.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#pairwise comparison 
peakiness.evAct.poly <- summary(contrast(emmeans(peakiness.evAct.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast

Peakiness Sample - random contrast commercial-lab

peakiness.comm.samp_vs_rand$contrasts
## grain = c:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       2.813774 0.01117545 Inf  2.791871  2.835678 251.782  <.0001
## 
## grain = f:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       4.530814 0.01117545 Inf  4.508910  4.552717 405.426  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peakiness sample-random contrast everyday-online

peakiness.evAct.samp_vs_rand$contrasts
## grain = c:
##  contrast  estimate        SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       14.788739 0.2991623 Inf 14.202392 15.375087  49.434  <.0001
## 
## grain = f:
##  contrast  estimate        SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1        5.503681 0.2991623 Inf  4.917333  6.090028  18.397  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peakiness smallest pairwise effect commercial-lab

peakiness.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 2 x 10
## # Groups:   grain [2]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - r… 6          c       0.0944 0.0447   Inf   0.00679     0.182    2.11
## 2 sample - r… 6          f       0.832  0.0447   Inf   0.744       0.920   18.6 
## # … with 1 more variable: p.value <dbl>

Peakiness smallest pairwise effect everyday-online

peakiness.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 2 x 10
## # Groups:   grain [2]
##   contrast     sampleSize grain estimate    SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>        <fct>      <fct>    <dbl> <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - ra… 4          c         6.92  1.20   Inf     4.57       9.26    5.78
## 2 sample - ra… 4          f         2.56  1.20   Inf     0.219      4.91    2.14
## # … with 1 more variable: p.value <dbl>

Peakiness polynomial fit commercial-lab

peakiness.comm.poly[peakiness.comm.poly$testCond == "sample",]
##     contrast grain testCond   estimate        SE  df  asymp.LCL  asymp.UCL
## 7     linear     c   sample  227.62551  1.165680 Inf  225.34082  229.91020
## 8  quadratic     c   sample   18.87373  2.388934 Inf   14.19151   23.55596
## 9      cubic     c   sample  -49.40573 31.731351 Inf -111.59803   12.78658
## 10    linear     f   sample  284.05796  1.165680 Inf  281.77327  286.34265
## 11 quadratic     f   sample  -64.98128  2.388934 Inf  -69.66350  -60.29905
## 12     cubic     f   sample -402.62671 31.731351 Inf -464.81902 -340.43440
##       z.ratio       p.value
## 7  195.272673  0.000000e+00
## 8    7.900485  2.778211e-15
## 9   -1.557000  1.194704e-01
## 10 243.684270  0.000000e+00
## 11 -27.200954 6.328278e-163
## 12 -12.688609  6.838959e-37

Peakiness polynomial fit commercial-lab

peakiness.evAct.poly[peakiness.evAct.poly$testCond == "sample",]
##     contrast grain testCond   estimate       SE  df   asymp.LCL  asymp.UCL
## 7     linear     c   sample  184.71152  31.2048 Inf   123.55124  245.87179
## 8  quadratic     c   sample   55.46601  63.9508 Inf   -69.87526  180.80728
## 9      cubic     c   sample -865.08418 849.4356 Inf -2529.94734  799.77898
## 10    linear     f   sample  127.87095  31.2048 Inf    66.71067  189.03122
## 11 quadratic     f   sample  -63.30634  63.9508 Inf  -188.64761   62.03493
## 12     cubic     f   sample  235.44441 849.4356 Inf -1429.41875 1900.30757
##       z.ratio      p.value
## 7   5.9193310 3.232539e-09
## 8   0.8673231 3.857650e-01
## 9  -1.0184223 3.084773e-01
## 10  4.0977979 4.170992e-05
## 11 -0.9899225 3.222120e-01
## 12  0.2771775 7.816438e-01

Peak-to-peak distance

Load dataset

setwd('../data/bootstrapped')
peaktopeak.comm <- read.delim("esMethods_PeakPeakDistance_c0.1_f0.05_1000Iterations_commercial.txt", head = TRUE)
peaktopeak.comm$sampleSize <- as.factor(peaktopeak.comm$sampleSize)
peaktopeak.comm$logPeaktopeak <- log(peaktopeak.comm$nearestDistance, base = 10)

peaktopeak.evAct <- read.delim("esMethods_PeakPeakDistance_c0.1_f0.05_1000Iterations_evAct.txt", head = TRUE)
peaktopeak.evAct$sampleSize <- as.factor(peaktopeak.evAct$sampleSize)
peaktopeak.evAct$logPeaktopeak <- log(peaktopeak.evAct$nearestDistance, base = 10)

Fit peak-to-peak values to growth and decay functions

##commercial 
peaktopeak.fit.coarse.comm <- functionFit(peaktopeak.comm$nearestDistance[peaktopeak.comm$grain == "c" & peaktopeak.comm$testCond == 'sample'], peaktopeak.comm$sampleSize[peaktopeak.comm$grain == "c" & peaktopeak.comm$testCond == 'sample'], sampleSize)
peaktopeak.fit.fine.comm <- functionFit(peaktopeak.comm$nearestDistance[peaktopeak.comm$grain == "f" & peaktopeak.comm$testCond == 'sample'], peaktopeak.comm$sampleSize[peaktopeak.comm$grain == "f" & peaktopeak.comm$testCond == 'sample'], sampleSize)

peaktopeak.functionFit <- peaktopeak.fit.coarse.comm[[1]] %>% mutate(grain = "c")
peaktopeak.functionFit <- rbind(peaktopeak.functionFit, peaktopeak.fit.fine.comm[[1]] %>% mutate(grain = "f"))
peaktopeak.functionFit$grain <- as.factor(peaktopeak.functionFit$grain)

#peaktopeak.functionFit.comm.figure <- plotFunctionFits(peaktopeak.functionFit, peaktopeak.comm[peaktopeak.comm$testCond == "sample",], "nearestDistance", "Peak-to-peak distance (s)")

for(i in 1:nrow(peaktopeak.comm)){
  if(peaktopeak.comm$grain[i] == 'c'){
    peaktopeak.comm$elbow[i] = peaktopeak.fit.coarse.comm[[3]][1]
  }else{
    peaktopeak.comm$elbow[i] = peaktopeak.fit.fine.comm[[3]][1]
  }
}

##everyday Activities 
peaktopeak.fit.coarse.evAct <- functionFit(peaktopeak.evAct$nearestDistance[peaktopeak.evAct$grain == "c" & peaktopeak.evAct$testCond == "sample"], peaktopeak.evAct$sampleSize[peaktopeak.evAct$grain == 'c' & peaktopeak.evAct$testCond == "sample"], sampleSize)
peaktopeak.fit.fine.evAct <- functionFit(peaktopeak.evAct$nearestDistance[peaktopeak.evAct$grain == "f" & peaktopeak.evAct$testCond == "sample"], peaktopeak.evAct$sampleSize[peaktopeak.evAct$grain == 'f' & peaktopeak.evAct$testCond == "sample"], sampleSize)

peaktopeak.evAct.functionFit <- peaktopeak.fit.coarse.evAct[[1]] %>% mutate(grain = "c")
peaktopeak.evAct.functionFit <- rbind(peaktopeak.evAct.functionFit, peaktopeak.fit.fine.evAct[[1]] %>% mutate(grain = "f"))
peaktopeak.evAct.functionFit$grain <- as.factor(peaktopeak.evAct.functionFit$grain)

#peaktopeak.functionFit.evAct.figure <- plotFunctionFits(peaktopeak.evAct.functionFit, peaktopeak.evAct[peaktopeak.evAct$testCond == "sample",], "nearestDistance", "Peak-to-peak distance (s)") + ylim(0,50)

for(i in 1:nrow(peaktopeak.evAct)){
  if(peaktopeak.evAct$grain[i] == 'c'){
    peaktopeak.evAct$elbow[i] = peaktopeak.fit.coarse.evAct[[3]][1]
  }else{
    peaktopeak.evAct$elbow[i] = peaktopeak.fit.fine.evAct[[3]][1]
  }
}

Plot peak-to-peak over sample size

Log10 transformed peak-to-peak distance over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

peaktopeak.comm.coarse.plot <- plotIndivAgreement(peaktopeak.comm[peaktopeak.comm$grain == 'c',], "logPeaktopeak", "Peak to peak distance (log)", 0, 2, "Coarse")
peaktopeak.comm.fine.plot <- plotIndivAgreement(peaktopeak.comm[peaktopeak.comm$grain == 'f',], "logPeaktopeak", "Peak to peak distance (log)", -0.5, 1.5, "Fine")

peaktopeak.evAct.coarse.plot <- plotIndivAgreement(peaktopeak.evAct[peaktopeak.evAct$grain == 'c', ], "logPeaktopeak", "Peak to peak distance (log)", 0, 2, "")
peaktopeak.evAct.fine.plot <- plotIndivAgreement(peaktopeak.evAct[peaktopeak.evAct$grain == 'f', ], "logPeaktopeak", "Peak to peak distance (log)", -0.5, 1.5, "")

peaktopeak.plot.combined <- ggarrange(peaktopeak.comm.coarse.plot, peaktopeak.comm.fine.plot,
                                      peaktopeak.evAct.coarse.plot, peaktopeak.evAct.fine.plot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

peaktopeak.plot.combined

setwd('../plots')
ggsave("esMethod_peaktopeak.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent", dpi = 300)
ggsave(plot = peaktopeak.plot.combined, file = "esMethod_peaktopeak.png", 
        type = "cairo-png",  bg = "white",
        width = 25, height = 19, units = "cm", dpi = 300)

Plot peak-to-peak function fits

Peak-to-peak values fit to various growth and decay functions over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

#df, original_df, var, ylab, ymin, ymax, asymptote
peaktopeak.comm.coarse.funcPlot <- plotFunctionFits(peaktopeak.functionFit[peaktopeak.functionFit$grain == 'c',], peaktopeak.comm[peaktopeak.comm$grain == 'c' & peaktopeak.comm$testCond == 'sample',], "nearestDistance", "Peak-to-peak distance (s)", 0, 15)
peaktopeak.comm.fine.funcPlot <- plotFunctionFits(peaktopeak.functionFit[peaktopeak.functionFit$grain == 'f',], peaktopeak.comm[peaktopeak.comm$grain == 'f' & peaktopeak.comm$testCond == 'sample',], "nearestDistance", "Peak-to-peak distance (s)", 0, 10)

peaktopeak.evAct.coarse.funcPlot <- plotFunctionFits(peaktopeak.evAct.functionFit[peaktopeak.evAct.functionFit$grain == 'c',], peaktopeak.evAct[peaktopeak.evAct$grain == 'c' & peaktopeak.evAct$testCond == 'sample',], "nearestDistance", "Peak-to-peak distance (s)", 0, 25)
peaktopeak.evAct.fine.funcPlot <- plotFunctionFits(peaktopeak.evAct.functionFit[peaktopeak.evAct.functionFit$grain == 'f',], peaktopeak.evAct[peaktopeak.evAct$grain == 'f' & peaktopeak.evAct$testCond == 'sample',], "nearestDistance", "Peak-to-peak distance (s)", 0, 5)

peaktopeak.funcFit.plot.combined <- ggarrange(peaktopeak.comm.coarse.funcPlot, peaktopeak.comm.fine.funcPlot,
                                      peaktopeak.evAct.coarse.funcPlot, peaktopeak.evAct.fine.funcPlot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

peaktopeak.funcFit.plot.combined

setwd('../plots')
ggsave("esMethod_peaktopeak_functionFit.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")

Peak-to-peak linear models and analysis

#commercial-lab 
peaktopeak.comm.lmer <- lmer(nearestDistance~sampleSize*grain*testCond + (1|movieName), data = peaktopeak.comm)
#emm_options(pbkrtest.limit = 19200)
peaktopeak.comm.poly <- summary(contrast(emmeans(peaktopeak.comm.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
peaktopeak.comm.rand_vs_samp <- summary(emmeans(peaktopeak.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, 1, -1))), infer = T)# overall random - sample 
peaktopeak.comm.crossMov_vs_samp <- summary(emmeans(peaktopeak.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, 0, -1))), infer = T)# overall crossMov - sample 
peaktopeak.comm.crossMov_vs_rand <- summary(emmeans(peaktopeak.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, -1, 0))), infer = T)# overall crossMov - random 

peaktopeak.comm.pairwise <- summary(emmeans(peaktopeak.comm.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "pairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

#everyday-online
peaktopeak.evAct.lmer <- lmer(nearestDistance~sampleSize*grain*testCond + (1|movieName), data = peaktopeak.evAct)
#emm_options(pbkrtest.limit = 38400)
peaktopeak.evAct.poly <- summary(contrast(emmeans(peaktopeak.evAct.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
peaktopeak.evAct.rand_vs_samp <- summary(emmeans(peaktopeak.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, 1, -1))), infer = T)# overall random - sample 
peaktopeak.evAct.crossMov_vs_samp <- summary(emmeans(peaktopeak.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, 0, -1))), infer = T)# overall crossMov - sample 
peaktopeak.evAct.crossMov_vs_rand <- summary(emmeans(peaktopeak.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, -1, 0))), infer = T)# overall crossMov - random 

peaktopeak.evAct.pairwise <- summary(emmeans(peaktopeak.evAct.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "pairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

Peak-to-peak random - sample contrast commercial-lab

peaktopeak.comm.rand_vs_samp$contrasts
## grain = c:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       4.413576 0.01067866 Inf  4.392647  4.434506 413.308  <.0001
## 
## grain = f:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.150112 0.01067866 Inf  1.129183  1.171042 107.702  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak random - sample contrast everyday-online

peaktopeak.evAct.rand_vs_samp$contrasts
## grain = c:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       6.9048670 0.01837269 Inf 6.8688572 6.9408768 375.822  <.0001
## 
## grain = f:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.7601504 0.01837269 Inf 0.7241406 0.7961602  41.374  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak cross movie - sample contrast commercial-lab

peaktopeak.comm.crossMov_vs_samp$contrasts
## grain = c:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       4.894355 0.01067866 Inf  4.873426  4.915285 458.331  <.0001
## 
## grain = f:
##  contrast estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.104360 0.01067866 Inf  1.083430  1.125290 103.418  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak cross movie - sample contrast everyday-online

peaktopeak.evAct.crossMov_vs_samp$contrasts
## grain = c:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       9.2249392 0.01837269 Inf 9.1889294  9.260949 502.101  <.0001
## 
## grain = f:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.9943129 0.01837269 Inf 0.9583031  1.030323  54.119  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak cross movie - random contrast commercial-lab

peaktopeak.comm.crossMov_vs_rand$contrasts
## grain = c:
##  contrast    estimate         SE  df   asymp.LCL   asymp.UCL z.ratio p.value
##  c1        0.48077910 0.01067866 Inf  0.45984932  0.50170888  45.022  <.0001
## 
## grain = f:
##  contrast    estimate         SE  df   asymp.LCL   asymp.UCL z.ratio p.value
##  c1       -0.04575247 0.01067866 Inf -0.06668225 -0.02482269  -4.284  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak cross movie - random contrast everyday-online

peaktopeak.evAct.crossMov_vs_rand$contrasts
## grain = c:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       2.3200722 0.01837269 Inf 2.2840624 2.3560820 126.278  <.0001
## 
## grain = f:
##  contrast  estimate         SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.2341625 0.01837269 Inf 0.1981527 0.2701723  12.745  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Peak-to-peak smallest sample size for significant pairwise effect commercial-lab

peaktopeak.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 5 x 10
## # Groups:   grain, contrast [5]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 crossMov -… 2          c        2.77  0.0427   Inf     2.69      2.86     64.9
## 2 random - s… 2          c        2.97  0.0427   Inf     2.89      3.06     69.6
## 3 crossMov -… 4          c        0.758 0.0427   Inf     0.674     0.842    17.7
## 4 crossMov -… 2          f        0.548 0.0427   Inf     0.464     0.632    12.8
## 5 random - s… 2          f        0.947 0.0427   Inf     0.864     1.03     22.2
## # … with 1 more variable: p.value <dbl>

Peak-to-peak smallest sample size for significant pairwise effect everyday-online

peaktopeak.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 crossMov -… 2          c        0.696 0.0735   Inf    0.552      0.840    9.47
## 2 crossMov -… 2          c        5.97  0.0735   Inf    5.83       6.12    81.3 
## 3 random - s… 2          c        5.28  0.0735   Inf    5.13       5.42    71.8 
## 4 crossMov -… 2          f        0.678 0.0735   Inf    0.534      0.822    9.23
## 5 random - s… 2          f        0.719 0.0735   Inf    0.575      0.863    9.79
## 6 crossMov -… 10         f        0.168 0.0735   Inf    0.0236     0.312    2.28
## # … with 1 more variable: p.value <dbl>

Peak-to-peak smallest pairwise effect commercial-lab

peaktopeak.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 5 x 10
## # Groups:   grain, contrast [5]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 crossMov -… 2          c        2.77  0.0427   Inf     2.69      2.86    64.9 
## 2 random - s… 4          c        2.72  0.0427   Inf     2.64      2.81    63.8 
## 3 crossMov -… 12         c        0.199 0.0427   Inf     0.115     0.283    4.66
## 4 crossMov -… 2          f        0.548 0.0427   Inf     0.464     0.632   12.8 
## 5 random - s… 10         f        0.900 0.0427   Inf     0.816     0.983   21.1 
## # … with 1 more variable: p.value <dbl>

Peak-to-peak smallest pairwise effect everyday-online coarse

peaktopeak.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 crossMov -… 2          c        0.696 0.0735   Inf    0.552      0.840    9.47
## 2 crossMov -… 2          c        5.97  0.0735   Inf    5.83       6.12    81.3 
## 3 random - s… 4          c        5.00  0.0735   Inf    4.85       5.14    68.0 
## 4 crossMov -… 4          f        0.616 0.0735   Inf    0.472      0.760    8.38
## 5 random - s… 4          f        0.569 0.0735   Inf    0.425      0.713    7.74
## 6 crossMov -… 10         f        0.168 0.0735   Inf    0.0236     0.312    2.28
## # … with 1 more variable: p.value <dbl>

Peak-to-peak largest pairwise effect everyday-online fine (note: non significant effect)

peaktopeak.evAct.pairwise %>% dplyr::filter(p.value > .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == max(z.ratio))
## # A tibble: 1 x 10
## # Groups:   grain, contrast [1]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 crossMov -… 8          f        0.142 0.0735   Inf  -0.00191     0.286    1.93
## # … with 1 more variable: p.value <dbl>

Peak-to-peak polynomial fit commercial-lab

peaktopeak.comm.poly[peaktopeak.comm.poly$testCond == "sample",]
##     contrast grain testCond    estimate        SE  df   asymp.LCL   asymp.UCL
## 13    linear     c   sample  -221.63346  1.113861 Inf  -223.81659  -219.45033
## 14 quadratic     c   sample   268.04856  2.282736 Inf   263.57448   272.52264
## 15     cubic     c   sample -2206.29730 30.320762 Inf -2265.72490 -2146.86970
## 16    linear     f   sample   -72.01552  1.113861 Inf   -74.19865   -69.83239
## 17 quadratic     f   sample    87.57899  2.282736 Inf    83.10491    92.05307
## 18     cubic     f   sample  -740.36656 30.320762 Inf  -799.79416  -680.93896
##       z.ratio       p.value
## 13 -198.97766  0.000000e+00
## 14  117.42426  0.000000e+00
## 15  -72.76523  0.000000e+00
## 16  -64.65395  0.000000e+00
## 17   38.36580  0.000000e+00
## 18  -24.41781 1.106489e-131

Peak-to-peak polynomial fit everyday-online

peaktopeak.evAct.poly[peaktopeak.evAct.poly$testCond == "sample",]
##     contrast grain testCond    estimate        SE  df   asymp.LCL   asymp.UCL
## 13    linear     c   sample  -215.25030  1.916405 Inf  -219.00638  -211.49421
## 14 quadratic     c   sample   218.83559  3.927461 Inf   211.13791   226.53327
## 15     cubic     c   sample -1791.00420 52.167063 Inf -1893.24977 -1688.75864
## 16    linear     f   sample   -43.80259  1.916405 Inf   -47.55868   -40.04651
## 17 quadratic     f   sample    52.48384  3.927461 Inf    44.78616    60.18152
## 18     cubic     f   sample  -452.69066 52.167063 Inf  -554.93623  -350.44510
##       z.ratio       p.value
## 13 -112.31984  0.000000e+00
## 14   55.71935  0.000000e+00
## 15  -34.33209 2.606724e-258
## 16  -22.85665 1.255063e-115
## 17   13.36330  9.907476e-41
## 18   -8.67771  4.038156e-18

Agreement Index

Load dataset

setwd('../data/bootstrapped')
agreement.comm <- read.delim("esMethods_AgreementIndex_1000Iterations_commercial.txt", head = TRUE)
agreement.comm$sampleSize <- as.factor(agreement.comm$sampleSize)

agreement.evAct <- read.delim("esMethods_AgreementIndex_1000Iterations_evAct.txt", head = TRUE)
agreement.evAct$sampleSize <- as.factor(agreement.evAct$sampleSize)

Fit Agreement Index values to growth and decay functions

##commercial-lab 
agreement.fit.coarse.comm <- functionFit(agreement.comm$agreementIndex[agreement.comm$grain == "c" & agreement.comm$testCond == 'sample'], agreement.comm$sampleSize[agreement.comm$grain == "c" & agreement.comm$testCond == 'sample'], sampleSize)
agreement.fit.fine.comm <- functionFit(agreement.comm$agreementIndex[agreement.comm$grain == "f" & agreement.comm$testCond == 'sample'], agreement.comm$sampleSize[agreement.comm$grain == "f" & agreement.comm$testCond == 'sample'], sampleSize)

agreement.functionFit <- agreement.fit.coarse.comm[[1]] %>% mutate(grain = "c")
agreement.functionFit <- rbind(agreement.functionFit, agreement.fit.fine.comm[[1]] %>% mutate(grain = "f"))
agreement.functionFit$grain <- as.factor(agreement.functionFit$grain)

for(i in 1:nrow(agreement.comm)){
  if(agreement.comm$grain[i] == 'c'){
    agreement.comm$elbow[i] = agreement.fit.coarse.comm[[3]][1]
  } else {
    agreement.comm$elbow[i] = agreement.fit.fine.comm[[3]][1]
  }
}

##everyday Activities-lab 
agreement.fit.coarse.evAct <- functionFit(agreement.evAct$agreementIndex[agreement.evAct$grain == "c" & agreement.evAct$testCond == "sample"], agreement.evAct$sampleSize[agreement.evAct$grain == 'c' & agreement.evAct$testCond == "sample"], sampleSize)
agreement.fit.fine.evAct <- functionFit(agreement.evAct$agreementIndex[agreement.evAct$grain == "f" & agreement.evAct$testCond == "sample"], agreement.evAct$sampleSize[agreement.evAct$grain == 'f' & agreement.evAct$testCond == "sample"], sampleSize)

agreement.evAct.functionFit <- agreement.fit.coarse.evAct[[1]] %>% mutate(grain = "c")
agreement.evAct.functionFit <- rbind(agreement.evAct.functionFit, agreement.fit.fine.evAct[[1]] %>% mutate(grain = "f"))
agreement.evAct.functionFit$grain <- as.factor(agreement.evAct.functionFit$grain)

for(i in 1:nrow(agreement.evAct)){
  if(agreement.evAct$grain[i] == 'c'){
    agreement.evAct$elbow[i] = agreement.fit.coarse.evAct[[3]][1]
  } else {
    agreement.evAct$elbow[i] = agreement.fit.fine.evAct[[3]][1]
  }
}

Plot Agreement Index over sample size

Agreement Index over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

agreement.comm.coarse.plot <- plotIndivAgreement(agreement.comm[agreement.comm$grain == 'c',], "agreementIndex", "Agreement Index", -0.0000000000000001, 1, "Coarse")
agreement.comm.fine.plot <- plotIndivAgreement(agreement.comm[agreement.comm$grain == 'f',],"agreementIndex", "Agreement Index", -0.0000000000000001, 1, "Fine")

agreement.evAct.coarse.plot <- plotIndivAgreement(agreement.evAct[agreement.evAct$grain == 'c', ], "agreementIndex", "Agreement Index", -0.0000000000000001, 1, "")
agreement.evAct.fine.plot <- plotIndivAgreement(agreement.evAct[agreement.evAct$grain == 'f', ],  "agreementIndex", "Agreement Index", -0.0000000000000001, 1, "")

agreement.plot.combined <- ggarrange(agreement.comm.coarse.plot, agreement.comm.fine.plot,
                                      agreement.evAct.coarse.plot, agreement.evAct.fine.plot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

#agreement.plot.combined

setwd('../plots')
ggsave("esMethod_agreement.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")
ggsave(plot = agreement.plot.combined, file = "esMethod_agreement_whitebg.png", 
       type = "cairo-png",  bg = "white",
       width = 25, height = 19, units = "cm", dpi = 300)

Plot Agreement Index function fits

Agreement Index fit to various growth and decay functions over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

agreement.comm.coarse.funcPlot <- plotFunctionFits(agreement.functionFit[agreement.functionFit$grain == 'c',], agreement.comm[agreement.comm$grain == 'c' & agreement.comm$testCond == 'sample',], "agreementIndex", "Agreement Index", -0.0000000000000001, 1)
agreement.comm.fine.funcPlot <- plotFunctionFits(agreement.functionFit[agreement.functionFit$grain == 'f',], agreement.comm[agreement.comm$grain == 'f' & agreement.comm$testCond == 'sample',], "agreementIndex", "Agreement Index", -0.0000000000000001, 1)

agreement.evAct.coarse.funcPlot <- plotFunctionFits(agreement.evAct.functionFit[agreement.evAct.functionFit$grain == 'c',], agreement.evAct[agreement.evAct$grain == 'c' & agreement.evAct$testCond == 'sample',], "agreementIndex", "Agreement Index", -0.0000000000000001, 1)
agreement.evAct.fine.funcPlot <- plotFunctionFits(agreement.evAct.functionFit[agreement.evAct.functionFit$grain == 'f',], agreement.evAct[agreement.evAct$grain == 'f' & agreement.evAct$testCond == 'sample',], "agreementIndex", "Agreement Index", -0.0000000000000001, 1)

agreement.funcFit.plot.combined <- ggarrange(agreement.comm.coarse.funcPlot, agreement.comm.fine.funcPlot,
                                      agreement.evAct.coarse.funcPlot, agreement.evAct.fine.funcPlot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

#agreement.funcFit.plot.combined

setwd('../plots')
ggsave("esMethod_agreement_functionFit.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")

Agreement Index linear models and analysis

#Commercial 
agreement.comm.lmer <- lmer(agreementIndex~sampleSize*grain*testCond + (1|movieName), data = agreement.comm)
emm_options(pbkrtest.limit = 10)
agreement.comm.poly <- summary(contrast(emmeans(agreement.comm.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
agreement.comm.rand_vs_samp <- summary(emmeans(agreement.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
agreement.comm.crossMov_vs_samp <- summary(emmeans(agreement.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
agreement.comm.crossMov_vs_rand <- summary(emmeans(agreement.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 1, 0))), infer = T)# overall crossMov - random 

agreement.comm.pairwise <- summary(emmeans(agreement.comm.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

agreement.evAct.lmer <- lmer(agreementIndex~sampleSize*grain*testCond + (1|movieName), data = agreement.evAct)
#emm_options(pbkrtest.limit = 38400)
agreement.evAct.poly <- summary(contrast(emmeans(agreement.evAct.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
agreement.evAct.rand_vs_samp <- summary(emmeans(agreement.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
agreement.evAct.crossMov_vs_samp <- summary(emmeans(agreement.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
agreement.evAct.crossMov_vs_rand <- summary(emmeans(agreement.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 1, 0))), infer = T)# overall crossMov - random 

agreement.evAct.pairwise <- summary(emmeans(agreement.evAct.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

Agreement Index random - sample contrast commercial-lab

agreement.comm.rand_vs_samp$contrasts
## grain = c:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1170318 0.0003396433 Inf 0.1163661 0.1176975 344.573  <.0001
## 
## grain = f:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1026190 0.0003396433 Inf 0.1019533 0.1032846 302.137  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index random - sample contrast everyday-online

agreement.evAct.rand_vs_samp$contrasts
## grain = c:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1940208 0.0003334875 Inf 0.1933672 0.1946744 581.793  <.0001
## 
## grain = f:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1608927 0.0003334875 Inf 0.1602391 0.1615464 482.455  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index cross movie - sample contrast commercial-lab

agreement.comm.crossMov_vs_samp$contrasts
## grain = c:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1874130 0.0003396433 Inf 0.1867473 0.1880787 551.794  <.0001
## 
## grain = f:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1635853 0.0003396433 Inf 0.1629196 0.1642510 481.639  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index cross movie - sample contrast everyday-online

agreement.evAct.crossMov_vs_samp$contrasts
## grain = c:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.2250822 0.0003334875 Inf 0.2244286 0.2257358 674.934  <.0001
## 
## grain = f:
##  contrast  estimate           SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.1882785 0.0003334875 Inf 0.1876249 0.1889322 564.575  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index cross movie - random contrast commercial-lab

agreement.comm.crossMov_vs_rand$contrasts
## grain = c:
##  contrast   estimate           SE  df  asymp.LCL  asymp.UCL z.ratio p.value
##  c1       0.07038114 0.0003396433 Inf 0.06971546 0.07104683 207.221  <.0001
## 
## grain = f:
##  contrast   estimate           SE  df  asymp.LCL  asymp.UCL z.ratio p.value
##  c1       0.06096638 0.0003396433 Inf 0.06030069 0.06163207 179.501  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index cross movie - random contrast everyday-online

agreement.evAct.crossMov_vs_rand$contrasts
## grain = c:
##  contrast   estimate           SE  df  asymp.LCL  asymp.UCL z.ratio p.value
##  c1       0.03106140 0.0003334875 Inf 0.03040777 0.03171502  93.141  <.0001
## 
## grain = f:
##  contrast   estimate           SE  df  asymp.LCL  asymp.UCL z.ratio p.value
##  c1       0.02738581 0.0003334875 Inf 0.02673219 0.02803944  82.119  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Agreement Index smallest sample size for significant pairwise effect commercial-lab

agreement.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c       0.0380 0.00136   Inf    0.0353    0.0406    27.9
## 2 sample - … 2          c       0.0634 0.00136   Inf    0.0608    0.0661    46.7
## 3 random - … 6          c       0.0175 0.00136   Inf    0.0149    0.0202    12.9
## 4 sample - … 2          f       0.0203 0.00136   Inf    0.0177    0.0230    15.0
## 5 sample - … 2          f       0.0849 0.00136   Inf    0.0823    0.0876    62.5
## 6 random - … 6          f       0.0225 0.00136   Inf    0.0198    0.0252    16.6
## # … with 1 more variable: p.value <dbl>

Agreement Index smallest sample size for significant pairwise effect everyday-online

agreement.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c      0.0610  0.00133   Inf   0.0584    0.0636    45.7 
## 2 sample - … 2          c      0.102   0.00133   Inf   0.0994    0.105     76.5 
## 3 random - … 10         c      0.00559 0.00133   Inf   0.00298   0.00821    4.19
## 4 sample - … 2          f      0.0619  0.00133   Inf   0.0593    0.0646    46.4 
## 5 sample - … 2          f      0.124   0.00133   Inf   0.121     0.127     92.9 
## 6 random - … 6          f      0.0146  0.00133   Inf   0.0120    0.0173    11.0 
## # … with 1 more variable: p.value <dbl>

Agreement Index smallest pairwise effect commercial-lab

agreement.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c       0.0380 0.00136   Inf    0.0353    0.0406    27.9
## 2 sample - … 2          c       0.0634 0.00136   Inf    0.0608    0.0661    46.7
## 3 random - … 6          c       0.0175 0.00136   Inf    0.0149    0.0202    12.9
## 4 sample - … 2          f       0.0203 0.00136   Inf    0.0177    0.0230    15.0
## 5 sample - … 2          f       0.0849 0.00136   Inf    0.0823    0.0876    62.5
## 6 random - … 6          f       0.0225 0.00136   Inf    0.0198    0.0252    16.6
## # … with 1 more variable: p.value <dbl>

Agreement Index smallest pairwise effect everyday-online

agreement.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c      0.0610  0.00133   Inf   0.0584    0.0636    45.7 
## 2 sample - … 2          c      0.102   0.00133   Inf   0.0994    0.105     76.5 
## 3 random - … 10         c      0.00559 0.00133   Inf   0.00298   0.00821    4.19
## 4 sample - … 2          f      0.0619  0.00133   Inf   0.0593    0.0646    46.4 
## 5 sample - … 2          f      0.124   0.00133   Inf   0.121     0.127     92.9 
## 6 random - … 6          f      0.0146  0.00133   Inf   0.0120    0.0173    11.0 
## # … with 1 more variable: p.value <dbl>

Agreement Index polynomial fit commercial-lab

agreement.comm.poly[agreement.comm.poly$testCond == "sample",]
##     contrast grain testCond  estimate         SE  df  asymp.LCL  asymp.UCL
## 13    linear     c   sample   9.99004 0.03542725 Inf   9.920604  10.059476
## 14 quadratic     c   sample  -9.66305 0.07260427 Inf  -9.805351  -9.520748
## 15     cubic     c   sample  63.15403 0.96437644 Inf  61.263882  65.044168
## 16    linear     f   sample  10.16639 0.03542725 Inf  10.096956  10.235829
## 17 quadratic     f   sample -11.97331 0.07260427 Inf -12.115608 -11.831004
## 18     cubic     f   sample  84.86902 0.96437644 Inf  82.978878  86.759164
##       z.ratio p.value
## 13  281.98743       0
## 14 -133.09204       0
## 15   65.48690       0
## 16  286.96530       0
## 17 -164.91188       0
## 18   88.00404       0

Agreement Index polynomial fit everyday-online

agreement.evAct.poly[agreement.evAct.poly$testCond == "sample",]
##     contrast grain testCond   estimate         SE  df  asymp.LCL  asymp.UCL
## 13    linear     c   sample  11.100443 0.03478516 Inf  11.032265  11.168620
## 14 quadratic     c   sample -10.234097 0.07128836 Inf -10.373819 -10.094374
## 15     cubic     c   sample  67.106357 0.94689780 Inf  65.250471  68.962242
## 16    linear     f   sample   7.987863 0.03478516 Inf   7.919686   8.056041
## 17 quadratic     f   sample  -9.892437 0.07128836 Inf -10.032160  -9.752714
## 18     cubic     f   sample  78.893767 0.94689780 Inf  77.037882  80.749653
##       z.ratio p.value
## 13  319.11432       0
## 14 -143.55914       0
## 15   70.86969       0
## 16  229.63423       0
## 17 -138.76650       0
## 18   83.31814       0

Surprise Index

Load dataset

setwd('../data/bootstrapped')

surprise.comm <- read.delim("esMethods_SurpriseIndex_c0.1_f0.05_1000Iterations_commercial.txt", head = TRUE)
surprise.comm$sampleSize <- as.factor(surprise.comm$sampleSize)
surprise.comm$logSurprise <- log(surprise.comm$surpriseIndex, base = 10)

surprise.evAct <- read.delim("esMethods_SurpriseIndex_c0.1_f0.05_1000Iterations_evAct.txt", head = TRUE)
surprise.evAct$sampleSize <- as.factor(surprise.evAct$sampleSize)
surprise.evAct$logSurprise <- log(surprise.evAct$surpriseIndex, base = 10)

Fit Surprise Index values to growth and decay functions

##commercial 
surprise.fit.coarse.comm <- functionFit(surprise.comm$surpriseIndex[surprise.comm$grain == "c" & surprise.comm$testCond == 'sample'], surprise.comm$sampleSize[surprise.comm$grain == "c" & surprise.comm$testCond == 'sample'], sampleSize)
surprise.fit.fine.comm <- functionFit(surprise.comm$surpriseIndex[surprise.comm$grain == "f" & surprise.comm$testCond == 'sample'], surprise.comm$sampleSize[surprise.comm$grain == "f" & surprise.comm$testCond == 'sample'], sampleSize)

surprise.functionFit <- surprise.fit.coarse.comm[[1]] %>% mutate(grain = "c")
surprise.functionFit <- rbind(surprise.functionFit, surprise.fit.fine.comm[[1]] %>% mutate(grain = "f"))
surprise.functionFit$grain <- as.factor(surprise.functionFit$grain)

#surprise.functionFit.comm.figure <- plotFunctionFits(surprise.functionFit, surprise.comm[surprise.comm$testCond == "sample",], "surpriseIndex", "Surprise Index (raw)")

for(i in 1:nrow(surprise.comm)){
  if(surprise.comm$grain[i] == 'c'){
    surprise.comm$elbow[i] = surprise.fit.coarse.comm[[3]][1]
  } else {
    surprise.comm$elbow[i] = surprise.fit.fine.comm[[3]][1]
  }
}

##everyday Activities 
surprise.fit.coarse.evAct <- functionFit(surprise.evAct$surpriseIndex[surprise.evAct$grain == "c" & surprise.evAct$testCond == "sample"], surprise.evAct$sampleSize[surprise.evAct$grain == 'c' & surprise.evAct$testCond == "sample"], sampleSize)
surprise.fit.fine.evAct <- functionFit(surprise.evAct$surpriseIndex[surprise.evAct$grain == "f" & surprise.evAct$testCond == "sample"], surprise.evAct$sampleSize[surprise.evAct$grain == 'f' & surprise.evAct$testCond == "sample"], sampleSize)

surprise.evAct.functionFit <- surprise.fit.coarse.evAct[[1]] %>% mutate(grain = "c")
surprise.evAct.functionFit <- rbind(surprise.evAct.functionFit, surprise.fit.fine.evAct[[1]] %>% mutate(grain = "f"))
surprise.evAct.functionFit$grain <- as.factor(surprise.evAct.functionFit$grain)

#surprise.functionFit.evAct.figure <- plotFunctionFits(surprise.evAct.functionFit, surprise.evAct[surprise.evAct$testCond == "sample",], "surpriseIndex", "Surprise Index (raw)") + ylim(0,15)

for(i in 1:nrow(surprise.evAct)){
  if(surprise.evAct$grain[i] == 'c'){
    surprise.evAct$elbow[i] = surprise.fit.coarse.evAct[[3]][1]
  } else {
    surprise.evAct$elbow[i] = surprise.fit.fine.evAct[[3]][1]
  }
}

Plot Surprise Index over sample size

Log10 transformed peak-to-peak distance over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

surprise.comm.coarse.plot <- plotIndivAgreement(surprise.comm[surprise.comm$grain == 'c',], "surpriseIndex", "Surprise Index ", -2, 15, "Coarse")
surprise.comm.fine.plot <- plotIndivAgreement(surprise.comm[surprise.comm$grain == 'f',], "surpriseIndex", "Surprise Index ", -2, 15, "Fine")

surprise.evAct.coarse.plot <- plotIndivAgreement(surprise.evAct[surprise.evAct$grain == 'c', ],  "surpriseIndex", "Surprise Index", -2,15, "")
surprise.evAct.fine.plot <- plotIndivAgreement(surprise.evAct[surprise.evAct$grain == 'f', ],  "surpriseIndex", "Surprise Index", -2, 15, "")

surprise.plot.combined <- ggarrange(surprise.comm.coarse.plot, surprise.comm.fine.plot,
                                      surprise.evAct.coarse.plot, surprise.evAct.fine.plot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

#surprise.plot.combined

setwd('../plots')
ggsave("esMethod_surprise.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent", dpi = 300)
ggsave(plot = surprise.plot.combined, file = "esMethod_surpriseIndex_whitebg.png", 
       type = "cairo-png",  bg = "white",
       width = 25, height = 19, units = "cm", dpi = 300)

Plot Surprise Index function fits

Peak-to-peak values fit to various growth and decay functions over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

surprise.comm.coarse.funcPlot <- plotFunctionFits(surprise.functionFit[surprise.functionFit$grain == 'c',], surprise.comm[surprise.comm$grain == 'c' & surprise.comm$testCond == 'sample',], "surpriseIndex", "Surprise Index", 0, 15)
surprise.comm.fine.funcPlot <- plotFunctionFits(surprise.functionFit[surprise.functionFit$grain == 'f',], surprise.comm[surprise.comm$grain == 'f' & surprise.comm$testCond == 'sample',], "surpriseIndex", "Surprise Index", 0, 15)

surprise.evAct.coarse.funcPlot <- plotFunctionFits(surprise.evAct.functionFit[surprise.evAct.functionFit$grain == 'c',], surprise.evAct[surprise.evAct$grain == 'c' & surprise.evAct$testCond == 'sample',], "surpriseIndex", "Surprise Index", 0, 15)
surprise.evAct.fine.funcPlot <- plotFunctionFits(surprise.evAct.functionFit[surprise.evAct.functionFit$grain == 'f',], surprise.evAct[surprise.evAct$grain == 'f' & surprise.evAct$testCond == 'sample',], "surpriseIndex", "Surprise Index", 0, 15)

surprise.funcFit.plot.combined <- ggarrange(surprise.comm.coarse.funcPlot, surprise.comm.fine.funcPlot,
                                      surprise.evAct.coarse.funcPlot, surprise.evAct.fine.funcPlot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

surprise.funcFit.plot.combined

setwd('../plots')
ggsave("esMethod_surprise_functionFit.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")

Surprise Index linear models and analysis

#Commercial 
surprise.comm.lmer <- lmer(surpriseIndex~sampleSize*grain*testCond + (1|movieName), data = surprise.comm)
#emm_options(lmerTest.limit = 19200)
surprise.comm.poly <- summary(contrast(emmeans(surprise.comm.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
surprise.comm.rand_vs_samp <- summary(emmeans(surprise.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
surprise.comm.crossMov_vs_samp <- summary(emmeans(surprise.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
surprise.comm.crossMov_vs_rand <- summary(emmeans(surprise.comm.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 1, 0))), infer = T)# overall crossMov - random 

surprise.comm.pairwise <- summary(emmeans(surprise.comm.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

surprise.evAct.lmer <- lmer(surpriseIndex~sampleSize*grain*testCond + (1|movieName), data = surprise.evAct)
#emm_options(lmerTest.limit = 38400)
surprise.evAct.poly <- summary(contrast(emmeans(surprise.evAct.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
surprise.evAct.rand_vs_samp <- summary(emmeans(surprise.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
surprise.evAct.crossMov_vs_samp <- summary(emmeans(surprise.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
surprise.evAct.crossMov_vs_rand <- summary(emmeans(surprise.evAct.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, -1, 0))), infer = T)# overall crossMov - random

surprise.evAct.pairwise <- summary(emmeans(surprise.evAct.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

Surprise Index random - sample contrast commercial-lab

surprise.comm.rand_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       2.319886 0.008740593 Inf  2.302755  2.337017 265.415  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       5.885151 0.008740593 Inf  5.868020  5.902283 673.313  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index random - sample contrast everyday-online

surprise.evAct.rand_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       2.331729 0.007496413 Inf  2.317037  2.346422 311.046  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       4.990986 0.007496413 Inf  4.976293  5.005678 665.783  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index cross movie - sample contrast commercial-lab

surprise.comm.crossMov_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       2.329437 0.008740593 Inf  2.312306  2.346568 266.508  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       5.860747 0.008740593 Inf  5.843616  5.877879 670.521  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index cross movie - sample contrast everyday-online

surprise.evAct.crossMov_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.959632 0.007496413 Inf  1.944939  1.974324 261.409  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       5.015791 0.007496413 Inf  5.001098  5.030483 669.092  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index cross movie - random contrast commercial-lab

surprise.comm.crossMov_vs_rand$contrasts
## grain = c:
##  contrast     estimate          SE  df    asymp.LCL    asymp.UCL z.ratio
##  c1        0.009551105 0.008740593 Inf -0.007580143  0.026682352   1.093
##  p.value
##   0.2745
## 
## grain = f:
##  contrast     estimate          SE  df    asymp.LCL    asymp.UCL z.ratio
##  c1       -0.024403891 0.008740593 Inf -0.041535138 -0.007272643  -2.792
##  p.value
##   0.0052
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index cross movie - random contrast everyday-online

surprise.evAct.crossMov_vs_rand$contrasts
## grain = c:
##  contrast    estimate          SE  df   asymp.LCL   asymp.UCL z.ratio p.value
##  c1        0.37209749 0.007496413 Inf  0.35740479  0.38679019  49.637  <.0001
## 
## grain = f:
##  contrast    estimate          SE  df   asymp.LCL   asymp.UCL z.ratio p.value
##  c1       -0.02480479 0.007496413 Inf -0.03949749 -0.01011209  -3.309  0.0009
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Surprise Index smallest sample size for significant pairwise effect commercial-lab

surprise.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 4 x 10
## # Groups:   grain, contrast [4]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 2          c        0.603 0.0350   Inf     0.535     0.672    17.3
## 2 sample - r… 2          c        0.616 0.0350   Inf     0.547     0.684    17.6
## 3 sample - c… 2          f        1.15  0.0350   Inf     1.08      1.22     33.0
## 4 sample - r… 2          f        1.13  0.0350   Inf     1.07      1.20     32.4
## # … with 1 more variable: p.value <dbl>

Surprise Index smallest sample size for significant pairwise effect everyday-online

surprise.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 4 x 10
## # Groups:   grain, contrast [4]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 2          c        0.915 0.0300   Inf     0.857     0.974    30.5
## 2 sample - r… 2          c        1.23  0.0300   Inf     1.17      1.29     41.1
## 3 sample - c… 2          f        1.82  0.0300   Inf     1.76      1.88     60.7
## 4 sample - r… 2          f        1.89  0.0300   Inf     1.83      1.95     63.1
## # … with 1 more variable: p.value <dbl>

Surprise Index smallest pairwise effect commercial-lab

surprise.comm.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 4 x 10
## # Groups:   grain, contrast [4]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 2          c        0.603 0.0350   Inf     0.535     0.672    17.3
## 2 sample - r… 2          c        0.616 0.0350   Inf     0.547     0.684    17.6
## 3 sample - c… 2          f        1.15  0.0350   Inf     1.08      1.22     33.0
## 4 sample - r… 2          f        1.13  0.0350   Inf     1.07      1.20     32.4
## # … with 1 more variable: p.value <dbl>

Surprise Index smallest pairwise effect everyday-online

surprise.evAct.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 4 x 10
## # Groups:   grain, contrast [4]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 4          c        0.833 0.0300   Inf     0.774     0.892    27.8
## 2 sample - r… 4          c        1.09  0.0300   Inf     1.03      1.15     36.4
## 3 sample - c… 2          f        1.82  0.0300   Inf     1.76      1.88     60.7
## 4 sample - r… 2          f        1.89  0.0300   Inf     1.83      1.95     63.1
## # … with 1 more variable: p.value <dbl>

Surprise Index polynomial fit commercial-lab

surprise.comm.poly[surprise.comm.poly$testCond == "sample",]
##     contrast grain testCond   estimate         SE  df  asymp.LCL  asymp.UCL
## 13    linear     c   sample  135.11608  0.9117071 Inf  133.32916  136.90299
## 14 quadratic     c   sample  -57.02735  1.8684436 Inf  -60.68943  -53.36527
## 15     cubic     c   sample   65.95959 24.8178667 Inf   17.31746  114.60171
## 16    linear     f   sample  310.28610  0.9117071 Inf  308.49918  312.07301
## 17 quadratic     f   sample -182.32969  1.8684436 Inf -185.99177 -178.66761
## 18     cubic     f   sample  384.12832 24.8178667 Inf  335.48619  432.77044
##       z.ratio       p.value
## 13 148.201188  0.000000e+00
## 14 -30.521311 1.359149e-204
## 15   2.657746  7.866513e-03
## 16 340.335281  0.000000e+00
## 17 -97.583729  0.000000e+00
## 18  15.477894  4.892249e-54

Surprise Index polynomial fit everyday-online

surprise.evAct.poly[surprise.evAct.poly$testCond == "sample",]
##     contrast grain testCond   estimate         SE  df  asymp.LCL  asymp.UCL
## 13    linear     c   sample  101.60825  0.7819301 Inf  100.07570  103.14081
## 14 quadratic     c   sample  -36.78383  1.6024798 Inf  -39.92464  -33.64303
## 15     cubic     c   sample  -79.01683 21.2851655 Inf -120.73499  -37.29867
## 16    linear     f   sample  172.24429  0.7819301 Inf  170.71174  173.77685
## 17 quadratic     f   sample -135.94442  1.6024798 Inf -139.08522 -132.80362
## 18     cubic     f   sample  699.54421 21.2851655 Inf  657.82606  741.26237
##       z.ratio       p.value
## 13 129.945443  0.000000e+00
## 14 -22.954320 1.334133e-116
## 15  -3.712296  2.053877e-04
## 16 220.280934  0.000000e+00
## 17 -84.833780  0.000000e+00
## 18  32.865340 6.877330e-237

Predictive value and detection accuracy

Load dataset

setwd('../data/bootstrapped')
normativeHit.comm <- read.delim("esMethods_normativeHitRate_bin_1000Iterations_commercial.txt", head = TRUE)

predictive.value<- normativeHit.comm %>% dplyr::select(Iteration, sampleSize, movieName, grain, testCond, normativePPV, normativeFalsePPV)
predictive.value$dPrime <- qnorm(predictive.value$normativePPV) - qnorm(predictive.value$normativeFalsePPV)
predictive.value <- predictive.value %>% dplyr::filter(is.finite(predictive.value$dPrime))
predictive.value$sampleSize <- as.factor(predictive.value$sampleSize)

detection.sensitivity <- normativeHit.comm %>% dplyr::select(Iteration, sampleSize, movieName, grain, testCond, normativeHit, normativeFA, normativeMiss, normativeCR)
detection.sensitivity$dPrime <- qnorm(detection.sensitivity$normativeHit) - qnorm(detection.sensitivity$normativeFA)
detection.sensitivity <- detection.sensitivity %>% dplyr::filter(is.finite(detection.sensitivity$dPrime))
detection.sensitivity$sampleSize <- as.factor(detection.sensitivity$sampleSize)

Fit predictive value and detection accuracy values to growth and decay functions

#zalladPrime
predictive.fit.coarse <- functionFit(predictive.value$dPrime[predictive.value$grain == "c" & predictive.value$testCond == 'sample'], predictive.value$sampleSize[predictive.value$grain == "c" & predictive.value$testCond == 'sample'], sampleSize)
predictive.fit.coarse <- functionFit(predictive.value$dPrime[predictive.value$grain == "f" & predictive.value$testCond == 'sample'], predictive.value$sampleSize[predictive.value$grain == "f" & predictive.value$testCond == 'sample'], sampleSize)

predictive.functionFit <- predictive.fit.coarse[[1]] %>% mutate(grain = "c")
predictive.functionFit <- rbind(predictive.functionFit, predictive.fit.coarse[[1]] %>% mutate(grain = "f"))
predictive.functionFit$grain <- as.factor(predictive.functionFit$grain)

for(i in 1:nrow(predictive.value)){
  if(predictive.value$grain[i] == 'c'){
    predictive.value$elbow[i] = predictive.fit.coarse[[3]][1]
  } else {
    predictive.value$elbow[i] = predictive.fit.coarse[[3]][1]
  }
}


##commercial 
detection.sensitivity.fit.coarse.comm <- functionFit(detection.sensitivity$dPrime[detection.sensitivity$grain == "c" & detection.sensitivity$testCond == 'sample'], detection.sensitivity$sampleSize[detection.sensitivity$grain == "c" & detection.sensitivity$testCond == 'sample'], sampleSize)
detection.sensitivity.fit.fine.comm <- functionFit(detection.sensitivity$dPrime[detection.sensitivity$grain == "f" & detection.sensitivity$testCond == 'sample'], detection.sensitivity$sampleSize[detection.sensitivity$grain == "f" & detection.sensitivity$testCond == 'sample'], sampleSize)

detection.sensitivity.functionFit <- detection.sensitivity.fit.coarse.comm[[1]] %>% mutate(grain = "c")
detection.sensitivity.functionFit <- rbind(detection.sensitivity.functionFit, detection.sensitivity.fit.fine.comm[[1]] %>% mutate(grain = "f"))
detection.sensitivity.functionFit$grain <- as.factor(detection.sensitivity.functionFit$grain)


for(i in 1:nrow(detection.sensitivity)){
  if(detection.sensitivity$grain[i] == 'c'){
    detection.sensitivity$elbow[i] = detection.sensitivity.fit.coarse.comm[[3]][1]
  } else {
    detection.sensitivity$elbow[i] = detection.sensitivity.fit.fine.comm[[3]][1]
  }
}

Plot predictive value and detection accuracy over sample size

Predictive value and detection accuracy over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

predictive.value.comm.coarse.plot <- plotIndivAgreement(predictive.value[predictive.value$grain == 'c',], "dPrime", "Predictive value", -6, 1, "Coarse")
predictive.value.comm.fine.plot <- plotIndivAgreement(predictive.value[predictive.value$grain == 'f',],"dPrime", "Predictive value", -6, 1, "Fine")

predictive.value.plot <- ggarrange(predictive.value.comm.coarse.plot, predictive.value.comm.fine.plot,
                                       nrow = 1, ncol = 2,
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

detection.sensitivity.coarse.plot <- plotIndivAgreement(detection.sensitivity[detection.sensitivity$grain == 'c',], "dPrime", "Detection accuracy", -1, 2, "Coarse")
detection.sensitivity.fine.plot <- plotIndivAgreement(detection.sensitivity[detection.sensitivity$grain == 'f',], "dPrime", "Detection accuracy", -1, 2, "Fine")

detection.sensitivity.plot <- ggarrange(detection.sensitivity.coarse.plot, detection.sensitivity.fine.plot,
                                       nrow = 1, ncol = 2,
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

predictive.detection.accuracy <- ggarrange(predictive.value.plot, detection.sensitivity.plot,
                                  nrow = 2, ncol = 1,
                                  common.legend = TRUE, legend = "bottom",
                                  labels = c("A","B"),
                                  align = "hv")

#predictive.detection.accuracy

setwd('../plots')
ggsave("esMethod_predictive&detection.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")
ggsave(plot = predictive.detection.accuracy, file = "esMethod_predictive&detection.png", 
       type = "cairo-png",  bg = "white",
       width = 25, height = 19, units = "cm", dpi = 300)

Plot predictive value and detection accuracy function fits

Predictive value and detection accuracy fit to various growth and decay functions over increasing sample sizes for segmentation of: A) commercial-lab and B) everyday-online.

predictive.coarse.funcPlot <- plotFunctionFits(predictive.functionFit[predictive.functionFit$grain == 'c',], predictive.value[predictive.value$grain == 'c' & predictive.value$testCond == 'sample',], "dPrime", "Predictive value", -4, 0)
predictive.fine.funcPlot <- plotFunctionFits(predictive.functionFit[predictive.functionFit$grain == 'f',], predictive.value[predictive.value$grain == 'f' & predictive.value$testCond == 'sample',], "dPrime", "Predictive value", -4, 0)

detection.coarse.funcPlot <- plotFunctionFits(detection.sensitivity.functionFit[detection.sensitivity.functionFit$grain == 'c',], detection.sensitivity[detection.sensitivity$grain == 'c' & detection.sensitivity$testCond == 'sample',], "dPrime", "Detection accuracy", -1, 2)
detection.fine.funcPlot <- plotFunctionFits(detection.sensitivity.functionFit[detection.sensitivity.functionFit$grain == 'f',], detection.sensitivity[detection.sensitivity$grain == 'f' & detection.sensitivity$testCond == 'sample',], "dPrime", "Detection accuracy", -1, 2)


predictive.detection.funcFit.plot.combined <- ggarrange(predictive.coarse.funcPlot, predictive.fine.funcPlot,
                                      detection.coarse.funcPlot, detection.fine.funcPlot,
                                       nrow = 2, ncol = 2,
                                       labels = c("A","", "B", ""),
                                       common.legend = TRUE, legend = "bottom",
                                       align = "hv")

predictive.detection.funcFit.plot.combined

setwd('../plots')
ggsave("esMethod_predictive&detection_functionFit.pdf", device = cairo_pdf, width = 25, height = 19, units = "cm", bg = "transparent")

Predictive value and detection accuracy linear models and analysis

#Predictive value  
predictive.value.lmer <- lmer(dPrime~sampleSize*grain*testCond + (1|movieName), data = predictive.value)
#emm_options(lmerTest.limit = 19029)

predictive.rand_vs_samp <- summary(emmeans(predictive.value.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
predictive.crossMov_vs_samp <- summary(emmeans(predictive.value.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
predictive.crossMov_vs_rand <- summary(emmeans(predictive.value.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, -1, 0))), infer = T)# overall crossMov - random 

predictive.poly <- summary(contrast(emmeans(predictive.value.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast

predictive.pairwise <- summary(emmeans(predictive.value.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

#Detection sensitivity  
detection.sensitivity.lmer <- lmer(dPrime~sampleSize*grain*testCond + (1|movieName), data = detection.sensitivity)
#emm_options(lmerTest.limit = 19029)
detection.poly <- summary(contrast(emmeans(detection.sensitivity.lmer,'sampleSize', by = c( 'grain', 'testCond')), 'poly', max.degree = 3), infer = TRUE)#Polynomial contrast
detection.rand_vs_samp <- summary(emmeans(detection.sensitivity.lmer, c("testCond"), by = "grain", contr = list(c1 = c(0, -1, 1))), infer = T)# overall random - sample 
detection.crossMov_vs_samp <- summary(emmeans(detection.sensitivity.lmer, c("testCond"), by = "grain", contr = list(c1 = c(-1, 0, 1))), infer = T)# overall crossMov - sample 
detection.crossMov_vs_rand <- summary(emmeans(detection.sensitivity.lmer, c("testCond"), by = "grain", contr = list(c1 = c(1, -1, 0))), infer = T)# overall crossMov - random 

detection.pairwise <- summary(emmeans(detection.sensitivity.lmer, "testCond",by = c("sampleSize", "grain"), weights = "proportional", contr = "revpairwise", adjust = "none")$contrasts, infer = TRUE)#random vs sample pairwise/ sample size

Predictive value random - sample contrast commercial-lab

predictive.rand_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.606279 0.002911571 Inf  1.600572  1.611985 551.688  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.128609 0.002870916 Inf  1.122982  1.134236 393.118  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Detection value random - sample contrast commercial-lab

detection.rand_vs_samp$contrasts
## grain = c:
##  contrast  estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.8238194 0.001387587 Inf 0.8210998 0.8265390 593.706  <.0001
## 
## grain = f:
##  contrast  estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.6593842 0.001368212 Inf 0.6567026 0.6620659 481.931  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Predictive value cross movie - sample contrast commercial-lab

predictive.crossMov_vs_samp$contrasts
## grain = c:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.494641 0.002901300 Inf  1.488954  1.500327 515.162  <.0001
## 
## grain = f:
##  contrast estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       1.142169 0.002870826 Inf  1.136542  1.147796 397.854  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Detection value cross movie - sample contrast commercial-lab

detection.crossMov_vs_samp$contrasts
## grain = c:
##  contrast  estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.7838627 0.001382692 Inf 0.7811527 0.7865728 566.910  <.0001
## 
## grain = f:
##  contrast  estimate          SE  df asymp.LCL asymp.UCL z.ratio p.value
##  c1       0.6705385 0.001368169 Inf 0.6678569 0.6732200 490.099  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Predictive value cross movie - random contrast commercial-lab

predictive.crossMov_vs_rand$contrasts
## grain = c:
##  contrast    estimate          SE  df   asymp.LCL    asymp.UCL z.ratio p.value
##  c1        0.11163813 0.002906696 Inf  0.10594111  0.117335152  38.407  <.0001
## 
## grain = f:
##  contrast    estimate          SE  df   asymp.LCL    asymp.UCL z.ratio p.value
##  c1       -0.01355972 0.002869577 Inf -0.01918398 -0.007935448  -4.725  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Detection value cross movie - random contrast commercial-lab

detection.crossMov_vs_rand$contrasts
## grain = c:
##  contrast    estimate          SE  df   asymp.LCL    asymp.UCL z.ratio p.value
##  c1        0.03995666 0.001385264 Inf  0.03724159  0.042671723  28.844  <.0001
## 
## grain = f:
##  contrast    estimate          SE  df   asymp.LCL    asymp.UCL z.ratio p.value
##  c1       -0.01115425 0.001367574 Inf -0.01383465 -0.008473855  -8.156  <.0001
## 
## Results are averaged over the levels of: sampleSize 
## Confidence level used: 0.95

Predictive value smallest sample size for significant pairwise effect commercial-lab

predictive.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 2          c       0.701  0.0130   Inf   0.676      0.727    54.0 
## 2 sample - r… 2          c       0.809  0.0135   Inf   0.782      0.835    59.7 
## 3 random - c… 4          c       0.0778 0.0119   Inf   0.0544     0.101     6.51
## 4 sample - c… 2          f       0.439  0.0116   Inf   0.416      0.461    37.9 
## 5 sample - r… 2          f       0.513  0.0116   Inf   0.490      0.535    44.3 
## 6 random - c… 4          f       0.0243 0.0115   Inf   0.00183    0.0468    2.12
## # … with 1 more variable: p.value <dbl>

Detection value smallest sample size for significant pairwise effect commercial-lab

detection.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(sampleSize == min(as.numeric(as.character(sampleSize))))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c       0.340  0.00619   Inf   0.328      0.352    54.8 
## 2 sample - … 2          c       0.453  0.00645   Inf   0.440      0.465    70.2 
## 3 random - … 4          c       0.0226 0.00569   Inf   0.0114     0.0337    3.97
## 4 sample - … 2          f       0.252  0.00551   Inf   0.242      0.263    45.8 
## 5 sample - … 2          f       0.298  0.00551   Inf   0.287      0.309    54.1 
## 6 random - … 4          f       0.0146 0.00547   Inf   0.00387    0.0253    2.67
## # … with 1 more variable: p.value <dbl>

Predictive value smallest pairwise effect commercial-lab

predictive.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast    sampleSize grain estimate     SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>       <fct>      <fct>    <dbl>  <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - c… 2          c       0.701  0.0130   Inf   0.676      0.727    54.0 
## 2 sample - r… 2          c       0.809  0.0135   Inf   0.782      0.835    59.7 
## 3 random - c… 4          c       0.0778 0.0119   Inf   0.0544     0.101     6.51
## 4 sample - c… 2          f       0.439  0.0116   Inf   0.416      0.461    37.9 
## 5 sample - r… 2          f       0.513  0.0116   Inf   0.490      0.535    44.3 
## 6 random - c… 4          f       0.0243 0.0115   Inf   0.00183    0.0468    2.12
## # … with 1 more variable: p.value <dbl>

Detection value smallest pairwise effect commercial-lab

detection.pairwise %>% dplyr::filter(p.value < .05, z.ratio > 0) %>% dplyr::group_by(grain, contrast) %>% filter(z.ratio == min(z.ratio))
## # A tibble: 6 x 10
## # Groups:   grain, contrast [6]
##   contrast   sampleSize grain estimate      SE    df asymp.LCL asymp.UCL z.ratio
##   <fct>      <fct>      <fct>    <dbl>   <dbl> <dbl>     <dbl>     <dbl>   <dbl>
## 1 sample - … 2          c       0.340  0.00619   Inf 0.328        0.352    54.8 
## 2 sample - … 2          c       0.453  0.00645   Inf 0.440        0.465    70.2 
## 3 random - … 4          c       0.0226 0.00569   Inf 0.0114       0.0337    3.97
## 4 sample - … 2          f       0.252  0.00551   Inf 0.242        0.263    45.8 
## 5 sample - … 2          f       0.298  0.00551   Inf 0.287        0.309    54.1 
## 6 random - … 24         f       0.0108 0.00547   Inf 0.0000972    0.0215    1.98
## # … with 1 more variable: p.value <dbl>

Predictive value polynomial fit commercial-lab

predictive.poly[predictive.poly$testCond == "sample",]
##     contrast grain testCond  estimate        SE  df asymp.LCL asymp.UCL
## 13    linear     c   sample  33.94804 0.3091606 Inf  33.34209  34.55398
## 14 quadratic     c   sample -27.89693 0.6387635 Inf -29.14888 -26.64498
## 15     cubic     c   sample 104.66649 8.4639687 Inf  88.07742 121.25556
## 16    linear     f   sample  26.84085 0.3000469 Inf  26.25277  27.42894
## 17 quadratic     f   sample -30.26383 0.6153609 Inf -31.46991 -29.05774
## 18     cubic     f   sample 222.88009 8.1725227 Inf 206.86224 238.89794
##      z.ratio       p.value
## 13 109.80711  0.000000e+00
## 14 -43.67333  0.000000e+00
## 15  12.36612  3.985941e-35
## 16  89.45554  0.000000e+00
## 17 -49.18061  0.000000e+00
## 18  27.27188 9.144464e-164

Detection value polynomial fit commercial-lab

detection.poly[detection.poly$testCond == "sample",]
##     contrast grain testCond  estimate        SE  df asymp.LCL asymp.UCL
## 13    linear     c   sample  17.18942 0.1473388 Inf  16.90064  17.47820
## 14 quadratic     c   sample -14.41364 0.3044199 Inf -15.01029 -13.81699
## 15     cubic     c   sample  62.04137 4.0337310 Inf  54.13540  69.94734
## 16    linear     f   sample  15.46568 0.1429954 Inf  15.18541  15.74594
## 17 quadratic     f   sample -17.17950 0.2932667 Inf -17.75430 -16.60471
## 18     cubic     f   sample 128.33832 3.8948342 Inf 120.70459 135.97206
##      z.ratio       p.value
## 13 116.66592  0.000000e+00
## 14 -47.34789  0.000000e+00
## 15  15.38064  2.207490e-53
## 16 108.15509  0.000000e+00
## 17 -58.57980  0.000000e+00
## 18  32.95091 4.105653e-238

#Extra analyses

setwd('../data/raw')
ts.data.commercial <- read.delim("esMethods_Commercial_TimeSeries_Filtered.txt", head = TRUE)
ts.data.evAct <- read.delim("esMethods_EvAct_TimeSeries_Filtered.txt", head = TRUE)
names(ts.data.commercial)[names(ts.data.commercial) == "movName"] <- "movieName"
names(ts.data.evAct)[names(ts.data.evAct) == "movName"] <- "movieName"

Correlate correlation between segmentation of different movies

comm.gp.bp <- ts.data.commercial %>% dplyr::group_by(grain, movieName, timeSeries) %>% dplyr::summarise(bp = mean(subject.bp))
evAct.gp.bp <- ts.data.evAct %>% dplyr::group_by(grain, movieName, timeSeries) %>% dplyr::summarise(bp = mean(subject.bp))

comm.corr.fine <- cor.test(comm.gp.bp$bp[comm.gp.bp$movieName == "3Iron" & comm.gp.bp$grain == "f"],
                           comm.gp.bp$bp[comm.gp.bp$movieName == "Corn" & comm.gp.bp$grain == "f"])

comm.corr.coarse <- cor.test(comm.gp.bp$bp[comm.gp.bp$movieName == "3Iron" & comm.gp.bp$grain == "c"],
                           comm.gp.bp$bp[comm.gp.bp$movieName == "Corn" & comm.gp.bp$grain == "c"])

evAct.corr.fine.bed_coffee <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "f"])
evAct.corr.fine.bed_dishes <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "f"])
evAct.corr.fine.bed_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "f"])

evAct.corr.coarse.bed_coffee <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "c"])
evAct.corr.coarse.bed_dishes <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "c"])
evAct.corr.coarse.bed_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_bed.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "c"])


evAct.corr.fine.coffee_dishes <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "f"])
evAct.corr.fine.coffee_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "f"])


evAct.corr.coarse.coffee_dishes <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "c"])
evAct.corr.coarse.coffee_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_coffee.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "c"])


evAct.corr.fine.dishes_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "f"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "f"])

evAct.corr.coarse.dishes_laundry <- cor.test(evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_dishes.mp4" & evAct.gp.bp$grain == "c"],
                           evAct.gp.bp$bp[evAct.gp.bp$movieName == "w_laundry.mp4" & evAct.gp.bp$grain == "c"])